import json
from fish_tool import sys_tool, logs

data_path = 'E:/code/data/LIC2022-百度比赛/百度-2022语言与智能技术竞赛：情感可解释评测/senti_ch_part1.txt'
label_path = 'E:/code/data/LIC2022-百度比赛/百度-2022语言与智能技术竞赛：情感可解释评测/senti_ch_part1_手工标注.txt'
merge_path = 'E:/code/data/LIC2022-百度比赛/百度-2022语言与智能技术竞赛：情感可解释评测/senti_ch_part1_标注_merge.txt'


def create_手工标注():
    data = sys_tool.read_jsonlist(data_path)
    for doc in data:
        doc['rationale'] = doc.pop('sent_token')
    sys_tool.write_jsonlist(data, label_path)


def merge(data_path, label_path, save_path):
    label_data = sys_tool.read_jsonlist(label_path)
    id__rationale = {}
    for doc in label_data:
        if "￥" in doc['rationale']:
            id__rationale[doc['id']] = doc['rationale']
    all_data = sys_tool.read_jsonlist(data_path)
    save_data = []
    for doc in all_data:
        _id = doc['id']
        if _id in id__rationale:
            doc['rationale'] = id__rationale[_id]
            if len(doc['rationale']) != len(doc['sent_token']):
                logs.print(f'{_id}  长度不一致')
                raise ValueError('xx')
            save_data.append(doc)
    sys_tool.write_jsonlist(save_data, save_path)


if __name__ == '__main__':
    merge(data_path, label_path, merge_path)
